//===--- ManglingUtils.h - Utilities for Swift name mangling ----*- C++ -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

#ifndef SWIFT_DEMANGLING_MANGLINGUTILS_H
#define SWIFT_DEMANGLING_MANGLINGUTILS_H

#include "StringRef.h"
#include "Punycode.h"

namespace swift {
    namespace Mangle {
        
        using llvm::StringRef;
        
        inline bool isLowerLetter(char ch) {
            return ch >= 'a' && ch <= 'z';
        }
        
        inline bool isUpperLetter(char ch) {
            return ch >= 'A' && ch <= 'Z';
        }
        
        inline bool isDigit(char ch) {
            return ch >= '0' && ch <= '9';
        }
        
        inline bool isLetter(char ch) {
            return isLowerLetter(ch) || isUpperLetter(ch);
        }
        
        /// Returns true if \p ch is a character which defines the begin of a
        /// substitution word.
        inline bool isWordStart(char ch) {
            return !isDigit(ch) && ch != '_' && ch != 0;
        }
        
        /// Returns true if \p ch is a character (following \p prevCh) which defines
        /// the end of a substitution word.
        inline bool isWordEnd(char ch, char prevCh) {
            if (ch == '_' || ch == 0)
                return true;
            
            if (!isUpperLetter(prevCh) && isUpperLetter(ch))
                return true;
            
            return false;
        }
        
        /// Returns true if \p ch is a valid character which may appear in a symbol
        /// mangling.
        inline bool isValidSymbolChar(char ch) {
            return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$';
        }
        
        /// Returns true if \p str contains any character which may not appear in a
        /// mangled symbol string and therefore must be punycode encoded.
        bool needsPunycodeEncoding(StringRef str);
        
        /// Returns true if \p str contains any non-ASCII character.
        bool isNonAscii(StringRef str);
        
        /// Describes a Word in a mangled identifier.
        struct SubstitutionWord {
            
            /// The position of the first word character in the mangled string.
            size_t start;
            
            /// The length of the word.
            size_t length;
        };
        
        /// Helper struct which represents a word substitution.
        struct WordReplacement {
            /// The position in the identifier where the word is substituted.
            size_t StringPos;
            
            /// The index into the mangler's Words array (-1 if invalid).
            int WordIdx;
        };
        
        /// Translate the given operator character into its mangled form.
        ///
        /// Current operator characters:   @/=-+*%<>!&|^~ and the special operator '..'
        char translateOperatorChar(char op);
        
        /// Returns a string where all characters of the operator \p Op are translated
        /// to their mangled form.
        std::string translateOperator(StringRef Op);
        
        /// Returns the standard type kind for an 'S' substitution, e.g. 'i' for "Int".
        char getStandardTypeSubst(StringRef TypeName);
        
        /// Mangles an identifier using a generic Mangler class.
        ///
        /// The Mangler class must provide the following:
        /// *) Words: An array of SubstitutionWord which holds the current list of
        ///           found words which can be used for substitutions.
        /// *) SubstWordsInIdent: An array of WordReplacement, which is just used
        ///           as a temporary storage during mangling. Must be empty.
        /// *) Buffer: A stream where the mangled identifier is written to.
        /// *) getBufferStr(): Returns a StringRef of the current content of Buffer.
        /// *) UsePunycode: A flag indicating if punycode encoding should be done.
        template <typename Mangler>
        void mangleIdentifier(Mangler &M, StringRef ident) {
            
            size_t WordsInBuffer = M.Words.size();
            assert(M.SubstWordsInIdent.empty());
            if (M.UsePunycode && needsPunycodeEncoding(ident)) {
                // If the identifier contains non-ASCII character, we mangle
                // with an initial '00' and Punycode the identifier string.
                std::string punycodeBuf;
                Punycode::encodePunycodeUTF8(ident, punycodeBuf,
                                             /*mapNonSymbolChars*/ true);
                StringRef pcIdent = punycodeBuf;
                M.Buffer << "00" << pcIdent.size();
                if (isDigit(pcIdent[0]) || pcIdent[0] == '_')
                    M.Buffer << '_';
                M.Buffer << pcIdent;
                return;
            }
            // Search for word substitutions and for new words.
            const size_t NotInsideWord = ~0;
            size_t wordStartPos = NotInsideWord;
            for (size_t Pos = 0, Len = ident.size(); Pos <= Len; ++Pos) {
                char ch = (Pos < Len ? ident[Pos] : 0);
                if (wordStartPos != NotInsideWord && isWordEnd(ch, ident[Pos - 1])) {
                    // This position is the end of a word, i.e. the next character after a
                    // word.
                    assert(Pos > wordStartPos);
                    size_t wordLen = Pos - wordStartPos;
                    StringRef Word = ident.substr(wordStartPos, wordLen);
                    
                    // Helper function to lookup the Word in a string.
                    auto lookupWord = [&] (StringRef Str,
                                           size_t FromWordIdx, size_t ToWordIdx) -> int {
                        for (size_t Idx = FromWordIdx; Idx < ToWordIdx; ++Idx) {
                            const SubstitutionWord &w = M.Words[Idx];
                            StringRef existingWord =  Str.substr(w.start, w.length);
                            if (Word == existingWord)
                                return (int)Idx;
                        }
                        return -1;
                    };
                    
                    // Is the word already present in the so far mangled string?
                    int WordIdx = lookupWord(M.getBufferStr(), 0, WordsInBuffer);
                    // Otherwise, is the word already present in this identifier?
                    if (WordIdx < 0)
                        WordIdx = lookupWord(ident, WordsInBuffer, M.Words.size());
                    
                    if (WordIdx >= 0) {
                        // We found a word substitution!
                        assert(WordIdx < 26);
                        M.SubstWordsInIdent.push_back({wordStartPos, WordIdx});
                    } else if (wordLen >= 2 && M.Words.size() < M.MaxNumWords) {
                        // It's a new word: remember it.
                        // Note: at this time the word's start position is relative to the
                        // begin of the identifier. We must update it afterwards so that it is
                        // relative to the begin of the whole mangled Buffer.
                        M.Words.push_back({wordStartPos, wordLen});
                    }
                    wordStartPos = NotInsideWord;
                }
                if (wordStartPos == NotInsideWord && isWordStart(ch)) {
                    // This position is the begin of a word.
                    wordStartPos = Pos;
                }
            }
            // If we have word substitutions mangle an initial '0'.
            if (!M.SubstWordsInIdent.empty())
                M.Buffer << '0';
            
            size_t Pos = 0;
            // Add a dummy-word at the end of the list.
            M.SubstWordsInIdent.push_back({ident.size(), -1});
            
            // Mangle a sequence of word substitutions and sub-strings.
            for (size_t Idx = 0, End = M.SubstWordsInIdent.size(); Idx < End; ++Idx) {
                const WordReplacement &Repl = M.SubstWordsInIdent[Idx];
                if (Pos < Repl.StringPos) {
                    // Mangle the sub-string up to the next word substitution (or to the end
                    // of the identifier - that's why we added the dummy-word).
                    // The first thing: we add the encoded sub-string length.
                    M.Buffer << (Repl.StringPos - Pos);
                    assert(!isDigit(ident[Pos]) &&
                           "first char of sub-string may not be a digit");
                    do {
                        // Update the start position of new added words, so that they refer to
                        // the begin of the whole mangled Buffer.
                        if (WordsInBuffer < M.Words.size() &&
                            M.Words[WordsInBuffer].start == Pos) {
                            M.Words[WordsInBuffer].start = M.getBufferStr().size();
                            WordsInBuffer++;
                        }
                        // Add a literal character of the sub-string.
                        M.Buffer << ident[Pos];
                        Pos++;
                    } while (Pos < Repl.StringPos);
                }
                // Is it a "real" word substitution (and not the dummy-word)?
                if (Repl.WordIdx >= 0) {
                    assert(Repl.WordIdx <= (int)WordsInBuffer);
                    Pos += M.Words[Repl.WordIdx].length;
                    if (Idx < End - 2) {
                        M.Buffer << (char)(Repl.WordIdx + 'a');
                    } else {
                        // The last word substitution is a capital letter.
                        M.Buffer << (char)(Repl.WordIdx + 'A');
                        if (Pos == ident.size())
                            M.Buffer << '0';
                    }
                }
            }
            M.SubstWordsInIdent.clear();
        }
        
        /// Utility class for mangling merged substitutions.
        ///
        /// Used in the Mangler and Remangler.
        class SubstitutionMerging {
            
            /// The position of the last substitution mangling,
            /// e.g. 3 for 'AabC' and 'Aab4C'
            size_t lastSubstPosition = 0;
            
            /// The size of the last substitution mangling,
            /// e.g. 1 for 'AabC' or 2 for 'Aab4C'
            size_t lastSubstSize = 0;
            
            /// The repeat count of the last substitution,
            /// e.g. 1 for 'AabC' or 4 for 'Aab4C'
            size_t lastNumSubsts = 0;
            
            /// True if the last substitution is an 'S' substitution,
            /// false if the last substitution is an 'A' substitution.
            bool lastSubstIsStandardSubst = false;
            
        public:
            
            // The only reason to limit the number of repeated substitutions is that we
            // don't want that the demangler blows up on a bogus substitution, e.g.
            // ...A832456823746582B...
            enum { MaxRepeatCount = 2048 };
            
            void clear() {
                lastNumSubsts = 0;
            }
            
            /// Tries to merge the substitution \p Subst with a previously mangled
            /// substitution.
            ///
            /// Returns true on success. In case of false, the caller must mangle the
            /// substitution separately in the form 'S<Subst>' or 'A<Subst>'.
            ///
            /// The Mangler class must provide the following:
            /// *) Buffer: A stream where the mangled identifier is written to.
            /// *) getBufferStr(): Returns a StringRef of the current content of Buffer.
            /// *) resetBuffer(size_t): Resets the buffer to an old position.
            template <typename Mangler>
            bool tryMergeSubst(Mangler &M, char Subst, bool isStandardSubst) {
                assert(isUpperLetter(Subst) || (isStandardSubst && isLowerLetter(Subst)));
                StringRef BufferStr = M.getBufferStr();
                if (lastNumSubsts > 0 && lastNumSubsts < MaxRepeatCount
                    && BufferStr.size() == lastSubstPosition + lastSubstSize
                    && lastSubstIsStandardSubst == isStandardSubst) {
                    
                    // The last mangled thing is a substitution.
                    assert(lastSubstPosition > 0 && lastSubstPosition < BufferStr.size());
                    assert(lastSubstSize > 0);
                    char lastSubst = BufferStr.back();
                    assert(isUpperLetter(lastSubst)
                           || (isStandardSubst && isLowerLetter(lastSubst)));
                    if (lastSubst != Subst && !isStandardSubst) {
                        // We can merge with a different 'A' substitution,
                        // e.g. 'AB' -> 'AbC'.
                        lastSubstPosition = BufferStr.size();
                        lastNumSubsts = 1;
                        M.resetBuffer(BufferStr.size() - 1);
                        assert(isUpperLetter(lastSubst));
                        M.Buffer << (char)(lastSubst - 'A' + 'a') << Subst;
                        lastSubstSize = 1;
                        return true;
                    }
                    if (lastSubst == Subst) {
                        // We can merge with the same 'A' or 'S' substitution,
                        // e.g. 'AB' -> 'A2B', or 'S3i' -> 'S4i'
                        lastNumSubsts++;
                        M.resetBuffer(lastSubstPosition);
                        M.Buffer << lastNumSubsts;
                        M.Buffer << Subst;
                        lastSubstSize = M.getBufferStr().size() - lastSubstPosition;
                        return true;
                    }
                }
                // We can't merge with the previous substitution, but let's remember this
                // substitution which will be mangled by the caller.
                lastSubstPosition = BufferStr.size() + 1;
                lastSubstSize = 1;
                lastNumSubsts = 1;
                lastSubstIsStandardSubst = isStandardSubst;
                return false;
            }
        };
        
    } // end namespace Mangle
} // end namespace swift

#endif // SWIFT_DEMANGLING_MANGLINGUTILS_H
